### Loader机制
#
# - 加载markdown
# - 加载cvs
# - 加载文件目录
# - 加载html
# - 加载JSON
# - 加载PDF

#使用loader来加载markdown文本
from langchain_community.document_loaders import TextLoader

loader = TextLoader("loader.md", encoding="utf-8")
res = loader.load()
print(res)


#使用loader来加载cvs文件
from langchain_community.document_loaders.csv_loader import CSVLoader

#loader = CSVLoader(file_path="loader.csv")
loader = CSVLoader(file_path="loader.csv",source_column="Location", encoding="utf-8")
data = loader.load()
print(data)

# excel
# pip install "unstructured[xlsx]"
# ImportError: failed to find libmagic. Check your installation
# pip uninstall python-magic
# pip install python-magic-bin==0.4.14

from langchain_community.document_loaders import DirectoryLoader

#目录下的.html和.rst文件不会被这种loader加载
#loader = DirectoryLoader("目录地址",glob="指定加载说明格式的文件")
loader = DirectoryLoader(path="./example/",glob="*.xlsx")
docs = loader.load()
len(docs)
print(docs)


#使用loader来加载html文件
#from langchain.document_loaders import UnstructuredHTMLLoader

#loader = UnstructuredHTMLLoader("loader.html")
from langchain_community.document_loaders import BSHTMLLoader
loader = BSHTMLLoader("loader.html", open_encoding="utf-8")
data = loader.load()
data

#使用loader来加载json文件
#需要先安装 ! pip install jq

from langchain_community.document_loaders import JSONLoader
loader = JSONLoader(
    file_path = "simple_prompt.json",jq_schema=".template",text_content=True
)
data = loader.load()
print(data)


# loader加载pdf文件
# pip install pypdf

from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("loader.pdf")
pages = loader.load_and_split()
print(pages[0])